driver.js ➔ ???   B
last analyzed

Complexity

Conditions 2
Paths 4

Size

Total Lines 43

Duplication

Lines 0
Ratio 0 %

Importance

Changes 10
Bugs 1 Features 0
Metric Value
cc 2
c 10
b 1
f 0
nc 4
nop 2
dl 0
loc 43
rs 8.8478

1 Function

Rating   Name   Duplication   Size   Complexity  
A driver.js ➔ ... ➔ ??? 0 1 1
1
'use strict';
2
3
const Wappalyzer = require('./wappalyzer');
4
const url = require('url');
5
const fs = require('fs');
6
const path = require('path');
7
const Browser = require('zombie');
8
9
const json = JSON.parse(fs.readFileSync(path.resolve(__dirname + '/apps.json')));
0 ignored issues
show
Compatibility introduced by
Consider using the path module for constructing paths since they are otherwise not cross-OS compatible.
Loading history...
10
11
const extensions = /^([^.]+$|\.(asp|aspx|cgi|htm|html|jsp|php)$)/;
12
13
class Driver {
14
  constructor(pageUrl, options) {
15
    this.options = Object.assign({}, {
16
			password: '',
17
			proxy: null,
18
			username: '',
19
      chunkSize: 5,
20
      debug: false,
21
      delay: 500,
22
      htmlMaxCols: 2000,
23
      htmlMaxRows: 3000,
24
      maxDepth: 3,
25
      maxUrls: 10,
26
      maxWait: 5000,
27
      recursive: false,
28
      userAgent: 'Mozilla/5.0 (compatible; Wappalyzer)',
29
    }, options || {});
30
31
    this.options.debug = Boolean(+this.options.debug);
32
    this.options.recursive = Boolean(+this.options.recursive);
33
    this.options.delay = this.options.recursive ? parseInt(this.options.delay, 10) : 0;
34
    this.options.maxDepth = parseInt(this.options.maxDepth, 10);
35
    this.options.maxUrls = parseInt(this.options.maxUrls, 10);
36
    this.options.maxWait = parseInt(this.options.maxWait, 10);
37
    this.options.htmlMaxCols = parseInt(this.options.htmlMaxCols, 10);
38
    this.options.htmlMaxRows = parseInt(this.options.htmlMaxRows, 10);
39
40
    this.origPageUrl = url.parse(pageUrl);
41
    this.analyzedPageUrls = [];
42
    this.apps = [];
43
    this.meta = {};
44
45
    this.wappalyzer = new Wappalyzer();
46
47
    this.wappalyzer.apps = json.apps;
48
    this.wappalyzer.categories = json.categories;
49
50
    this.wappalyzer.parseJsPatterns();
51
52
    this.wappalyzer.driver.log = (message, source, type) => this.log(message, source, type);
53
    this.wappalyzer.driver.displayApps = (detected, meta, context) => this.displayApps(detected, meta, context);
54
55
    process.on('uncaughtException', e => this.wappalyzer.log('Uncaught exception: ' + e.message, 'driver', 'error'));
56
  }
57
58
  analyze() {
59
    this.time = {
60
      start: new Date().getTime(),
61
      last: new Date().getTime(),
62
    }
63
64
    return this.crawl(this.origPageUrl);
65
  }
66
67
  log(message, source, type) {
68
    this.options.debug && console.log('[wappalyzer ' + type + ']', '[' + source + ']', message);
0 ignored issues
show
Debugging Code introduced by
console.log looks like debug code. Are you sure you do not want to remove it?
Loading history...
69
  }
70
71
  displayApps(detected, meta) {
72
    this.meta = meta;
73
74
    Object.keys(detected).forEach(appName => {
75
      const app = detected[appName];
76
77
      var categories = [];
78
79
      app.props.cats.forEach(id => {
80
        var category = {};
81
82
        category[id] = json.categories[id].name;
83
84
        categories.push(category)
85
      });
86
87
      if ( !this.apps.some(detectedApp => detectedApp.name === app.name) ) {
88
        this.apps.push({
89
          name: app.name,
90
          confidence: app.confidenceTotal.toString(),
91
          version: app.version,
92
          icon: app.props.icon || 'default.svg',
93
          website: app.props.website,
94
          categories
95
        });
96
      }
97
    });
98
  }
99
100
  fetch(pageUrl, index, depth) {
101
    // Return when the URL is a duplicate or maxUrls has been reached
102
    if ( this.analyzedPageUrls.indexOf(pageUrl.href) !== -1 || this.analyzedPageUrls.length >= this.options.maxUrls ) {
103
      return Promise.resolve();
104
    }
105
106
    this.analyzedPageUrls.push(pageUrl.href);
107
108
    const timerScope = {
109
      last: new Date().getTime()
110
    };
111
112
    this.timer('fetch; url: ' + pageUrl.href + '; depth: ' + depth + '; delay: ' + ( this.options.delay * index ) + 'ms', timerScope);
113
114
    return new Promise(resolve => this.sleep(this.options.delay * index).then(() => this.visit(pageUrl, timerScope, resolve)));
115
  }
116
117
  visit(pageUrl, timerScope, resolve) {
118
    const browser = new Browser({
119
			proxy: this.options.proxy,
120
      silent: true,
121
      strictSSL: false,
122
      userAgent: this.options.userAgent,
123
      waitDuration: this.options.maxWait,
124
    });
125
126
		browser.on('authenticate', auth => {
127
			auth.username = this.options.username;
128
			auth.password = this.options.password;
129
		});
130
131
    this.timer('browser.visit start; url: ' + pageUrl.href, timerScope);
132
133
    browser.visit(pageUrl.href, () => {
134
      this.timer('browser.visit end; url: ' + pageUrl.href, timerScope);
135
136
      if ( !this.responseOk(browser, pageUrl) ) {
137
        return resolve();
138
      }
139
140
      const headers = this.getHeaders(browser);
141
      const html = this.getHtml(browser);
142
      const scripts = this.getScripts(browser);
143
      const js = this.getJs(browser);
144
      const cookies = this.getCookies(browser);
145
146
      this.wappalyzer.analyze(pageUrl, {
147
        headers,
148
        html,
149
        scripts,
150
        js,
151
        cookies,
152
      })
153
        .then(() => {
154
          const links = Array.prototype.reduce.call(
155
            browser.document.getElementsByTagName('a'), (results, link) => {
156
              if ( link.protocol.match(/https?:/) && link.hostname === this.origPageUrl.hostname && extensions.test(link.pathname) ) {
157
                link.hash = '';
158
159
                results.push(url.parse(link.href));
160
              }
161
162
              return results;
163
            }, []
164
          );
165
166
          return resolve(links);
167
        });
0 ignored issues
show
Best Practice introduced by
There is no return statement in this branch, but you do return something in other branches. Did you maybe miss it? If you do not want to return anything, consider adding return undefined; explicitly.
Loading history...
168
    });
169
  }
170
171
  responseOk(browser, pageUrl) {
172
    // Validate response
173
    const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null;
174
175
    if ( !resource ) {
176
      this.wappalyzer.log('No response from server; url: ' + pageUrl.href, 'driver', 'error');
177
178
      return false;
179
    }
180
181
    if ( resource.response.status !== 200 ) {
182
      this.wappalyzer.log('Response was not OK; status: ' + resource.response.status + ' ' + resource.response.statusText + '; url: ' + pageUrl.href, 'driver', 'error');
183
184
      return false;
185
    }
186
187
    const headers = this.getHeaders(browser);
188
189
    // Validate content type
190
    const contentType = headers.hasOwnProperty('content-type') ? headers['content-type'].shift() : null;
191
192
    if ( !contentType || !/\btext\/html\b/.test(contentType) ) {
193
      this.wappalyzer.log('Skipping; url: ' + pageUrl.href + '; content type: ' + contentType, 'driver');
194
195
      this.analyzedPageUrls.splice(this.analyzedPageUrls.indexOf(pageUrl.href), 1);
196
197
      return false;
198
    }
199
200
    // Validate document
201
    if ( !browser.document || !browser.document.documentElement ) {
202
      this.wappalyzer.log('No HTML document; url: ' + pageUrl.href, 'driver', 'error');
203
204
      return false;
205
    }
206
207
    return true;
208
  }
209
210
  getHeaders(browser) {
211
    const headers = {};
212
213
    const resource = browser.resources.length ? browser.resources.filter(resource => resource.response).shift() : null;
214
215
    if ( resource ) {
216
      resource.response.headers._headers.forEach(header => {
217
        if ( !headers[header[0]] ){
218
          headers[header[0]] = [];
219
        }
220
221
        headers[header[0]].push(header[1]);
222
      });
223
    }
224
225
    return headers;
226
  }
227
228
  getHtml(browser) {
229
    let html = '';
230
231
    try {
232
      html = browser.html()
233
        .split('\n')
234
        .slice(0, this.options.htmlMaxRows / 2).concat(html.slice(html.length - this.options.htmlMaxRows / 2))
235
        .map(line => line.substring(0, this.options.htmlMaxCols))
236
        .join('\n');
237
    } catch ( error ) {
238
      this.wappalyzer.log(error.message, 'browser', 'error');
239
    }
240
241
    return html;
242
  }
243
244
  getScripts(browser) {
245
    if ( !browser.document || !browser.document.scripts ) {
246
      return [];
247
    }
248
249
    const scripts = Array.prototype.slice
250
      .apply(browser.document.scripts)
251
      .filter(script => script.src)
252
      .map(script => script.src);
253
254
    return scripts;
255
  }
256
257
  getJs(browser) {
258
    const patterns = this.wappalyzer.jsPatterns;
259
    const js = {};
260
261
    Object.keys(patterns).forEach(appName => {
262
      js[appName] = {};
263
264
      Object.keys(patterns[appName]).forEach(chain => {
265
        js[appName][chain] = {};
266
267
        patterns[appName][chain].forEach((pattern, index) => {
268
          const properties = chain.split('.');
269
270
          let value = properties.reduce((parent, property) => {
271
            return parent && parent.hasOwnProperty(property) ? parent[property] : null;
272
          }, browser.window);
273
274
          value = typeof value === 'string' || typeof value === 'number' ? value : !!value;
275
276
          if ( value ) {
277
            js[appName][chain][index] = value;
278
          }
279
        });
280
      });
281
    });
282
283
    return js;
284
  }
285
286
  getCookies(browser) {
287
    const cookies = [];
288
289
    if ( browser.cookies ) {
290
      browser.cookies.forEach(cookie => cookies.push({
291
        name: cookie.key,
292
        value: cookie.value,
293
        domain: cookie.domain,
294
        path: cookie.path,
295
      }));
296
    }
297
298
    return cookies;
299
  }
300
301
  crawl(pageUrl, index = 1, depth = 1) {
302
    pageUrl.canonical = pageUrl.protocol + '//' + pageUrl.host + pageUrl.pathname;
303
304
    return new Promise(resolve => {
305
      this.fetch(pageUrl, index, depth)
306
        .catch(() => {})
307
        .then(links => {
308
          if ( links && this.options.recursive && depth < this.options.maxDepth ) {
309
            return this.chunk(links.slice(0, this.options.maxUrls), depth + 1);
310
          } else {
311
            return Promise.resolve();
312
          }
313
        })
314
        .then(() => {
315
          resolve({
316
            urls: this.analyzedPageUrls,
317
            applications: this.apps,
318
            meta: this.meta
319
          });
320
        });
321
    });
322
  }
323
324
  chunk(links, depth, chunk = 0) {
325
    if ( links.length === 0 ) {
326
      return Promise.resolve();
327
    }
328
329
    const chunked = links.splice(0, this.options.chunkSize);
330
331
    return new Promise(resolve => {
332
      Promise.all(chunked.map((link, index) => this.crawl(link, index, depth)))
333
        .then(() => this.chunk(links, depth, chunk + 1))
334
        .then(() => resolve());
335
    });
336
  }
337
338
  sleep(ms) {
339
    return ms ? new Promise(resolve => setTimeout(resolve, ms)) : Promise.resolve();
340
  }
341
342
  timer(message, scope) {
343
    const time = new Date().getTime();
344
    const sinceStart = ( Math.round(( time - this.time.start ) / 10) / 100) + 's';
345
    const sinceLast = ( Math.round(( time - scope.last ) / 10) / 100) + 's';
346
347
    this.wappalyzer.log('[timer] ' + message + '; lapsed: ' + sinceLast + ' / ' + sinceStart, 'driver');
348
349
    scope.last = time;
350
  }
351
};
352
353
module.exports = Driver;
354